Setup
Maps with ggplot2
Maps are polygons and can be plotted using ggplot2::geom_map().
Below I illustrate this with
- a simple map of the world and
- a more customized version of Italy.
long lat group order region subregion
1 -69.89912 12.45200 1 1 Aruba <NA>
2 -69.89571 12.42300 1 2 Aruba <NA>
3 -69.94219 12.43853 1 3 Aruba <NA>
Code

long lat group order region subregion
1 11.83295 46.50011 1 1 Bolzano-Bozen <NA>
2 11.81089 46.52784 1 2 Bolzano-Bozen <NA>
3 11.73068 46.51890 1 3 Bolzano-Bozen <NA>
Code
# define a custom colour palette and recycle the colours
palette <- brewer.pal(8, "Dark2")
my_palette <- rep(palette, length.out = 100)
ggplot(data=italy_map) +
geom_map(map=italy_map, aes(map_id=region, fill=region)) +
expand_limits(x = italy_map$long, y = italy_map$lat) +
scale_fill_manual(values = my_palette) +
theme(legend.position = "none") +
ggtitle('Italy with custom colours')
Overlap with Data
Projecting external data onto geographic maps is easy. We just need to make sure that the data contains a variable (map_id) that corresponds to the region variable in the map.
First we show this for a standard data set that comes with R: USArrests
Code
# map of US states as polygon
states_map <- ggplot2::map_data("state")
# simple map of regions
p <- ggplot(data=states_map) +
geom_map(map=states_map, aes(map_id=region, fill=region)) +
expand_limits(x = states_map$long, y = states_map$lat) +
ggtitle('US states with colour-coded regions') +
theme(legend.position = "none")
p
Code
Murder Assault UrbanPop Rape state
Alabama 13.2 236 58 21.2 alabama
Alaska 10.0 263 48 44.5 alaska
Arizona 8.1 294 80 31.0 arizona
long lat group order region subregion
1 -87.46201 30.38968 1 1 alabama <NA>
2 -87.48493 30.37249 1 2 alabama <NA>
3 -87.52503 30.37249 1 3 alabama <NA>
Code

More Examples
Next I repeat the same for other data sets: gapminder and a custom data set with emigration data from an Excel file.
Notice that some countries are missing because map by name failed (e.g. USA). This should be fixed, but I leave it here to highlight the challenge.
Code

Code
# read emigration data from Excel file
fn = "data/20200501_emigrant_remittance_data_visualization_raw.xlsx"
emigration_data = readxl::read_excel(path = fn)
#simplify the column names
colnames(emigration_data)=c("country", "Pop2019", "Emi2019", "Share_t", "Share_i", "Remit2019_V", "Remit2019_GDP")
# using the orginal world_map (long, lat, region) and data emigration_data
# where emigration_data$country is used to map to world_map$region
ggplot(data=emigration_data) +
geom_map(map=world_map,
aes(fill=Emi2019, map_id=country), colour = "#7f7f7f") +
expand_limits(x = world_map$long, y = world_map$lat) +
ggtitle('world_map with own data(Emi2019)')
Cooordinates: fine tuning
There are various coordinate systems. We can use coord_sf() to make specific choices for beautification.
Here I revisit the plot of USA murder statistics from above
Code
# crs = 5070 is a Conus Albers projection for North America, see: https://epsg.io/5070
# default_crs = 4326 tells coord_sf() that the input map data are in longitude-latitude format
coords <- ggplot2::coord_sf(
crs = 5070, default_crs = 4326,
xlim = c(-125, -70), ylim = c(25, 52)
)
# p was the defined above
p + coords
Multiple Maps
Using a long data format and facet_wrap we can also plot multiple maps.
sf format and geom_sf
sf is a common data standard for simple feature objects such as geographical data.
The sf package provides many tools for reading and transforming sf data.
ggplot2 provides a convenient interface geom_sf to plot such data.
Rows: 100
Columns: 15
$ AREA <dbl> 0.114, 0.061, 0.143, 0.070, 0.153, 0.097, 0.062, 0.091, 0.11…
$ PERIMETER <dbl> 1.442, 1.231, 1.630, 2.968, 2.206, 1.670, 1.547, 1.284, 1.42…
$ CNTY_ <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1836, 1837, …
$ CNTY_ID <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1836, 1837, …
$ NAME <chr> "Ashe", "Alleghany", "Surry", "Currituck", "Northampton", "H…
$ FIPS <chr> "37009", "37005", "37171", "37053", "37131", "37091", "37029…
$ FIPSNO <dbl> 37009, 37005, 37171, 37053, 37131, 37091, 37029, 37073, 3718…
$ CRESS_ID <int> 5, 3, 86, 27, 66, 46, 15, 37, 93, 85, 17, 79, 39, 73, 91, 42…
$ BIR74 <dbl> 1091, 487, 3188, 508, 1421, 1452, 286, 420, 968, 1612, 1035,…
$ SID74 <dbl> 1, 0, 5, 1, 9, 7, 0, 0, 4, 1, 2, 16, 4, 4, 4, 18, 3, 4, 1, 1…
$ NWBIR74 <dbl> 10, 10, 208, 123, 1066, 954, 115, 254, 748, 160, 550, 1243, …
$ BIR79 <dbl> 1364, 542, 3616, 830, 1606, 1838, 350, 594, 1190, 2038, 1253…
$ SID79 <dbl> 0, 3, 6, 2, 3, 5, 2, 2, 2, 5, 2, 5, 4, 4, 6, 17, 4, 7, 1, 0,…
$ NWBIR79 <dbl> 19, 12, 260, 145, 1197, 1237, 139, 371, 844, 176, 597, 1369,…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-81.47276 3..., MULTIPOLYGON ((…


Maps from rnaturalearth
There are additional libraries to retrieve Geo-coordinates at various resolutions and formats.
Code
# a world map from the rnaturalearth library in sf format
# this includes a geometry field per region (defined as MULTIPOLYGON)
# notice that 'world' also contains additional data such as population 'pop_est'
world <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf")
world %>% glimpse()Rows: 241
Columns: 64
$ scalerank <int> 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 3, 1, 5, 3, 1, 1, 1, 1, 1, 1,…
$ featurecla <chr> "Admin-0 country", "Admin-0 country", "Admin-0 country", "A…
$ labelrank <dbl> 5, 3, 3, 6, 6, 6, 6, 4, 2, 6, 4, 4, 5, 6, 6, 2, 4, 5, 6, 2,…
$ sovereignt <chr> "Netherlands", "Afghanistan", "Angola", "United Kingdom", "…
$ sov_a3 <chr> "NL1", "AFG", "AGO", "GB1", "ALB", "FI1", "AND", "ARE", "AR…
$ adm0_dif <dbl> 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,…
$ level <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ type <chr> "Country", "Sovereign country", "Sovereign country", "Depen…
$ admin <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ adm0_a3 <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ geou_dif <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ geounit <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ gu_a3 <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ su_dif <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ subunit <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ su_a3 <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ brk_diff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ name <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ name_long <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ brk_a3 <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ brk_name <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ brk_group <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ abbrev <chr> "Aruba", "Afg.", "Ang.", "Ang.", "Alb.", "Aland", "And.", "…
$ postal <chr> "AW", "AF", "AO", "AI", "AL", "AI", "AND", "AE", "AR", "ARM…
$ formal_en <chr> "Aruba", "Islamic State of Afghanistan", "People's Republic…
$ formal_fr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ note_adm0 <chr> "Neth.", NA, NA, "U.K.", NA, "Fin.", NA, NA, NA, NA, "U.S.A…
$ note_brk <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Multiple claim…
$ name_sort <chr> "Aruba", "Afghanistan", "Angola", "Anguilla", "Albania", "A…
$ name_alt <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ mapcolor7 <dbl> 4, 5, 3, 6, 1, 4, 1, 2, 3, 3, 4, 4, 1, 7, 2, 1, 3, 1, 2, 3,…
$ mapcolor8 <dbl> 2, 6, 2, 6, 4, 1, 4, 1, 1, 1, 5, 5, 2, 5, 2, 2, 1, 6, 2, 2,…
$ mapcolor9 <dbl> 2, 8, 6, 6, 1, 4, 1, 3, 3, 2, 1, 1, 2, 9, 5, 2, 3, 5, 5, 1,…
$ mapcolor13 <dbl> 9, 7, 1, 3, 6, 6, 8, 3, 13, 10, 1, NA, 7, 11, 5, 7, 4, 8, 8…
$ pop_est <dbl> 103065, 28400000, 12799293, 14436, 3639453, 27153, 83888, 4…
$ gdp_md_est <dbl> 2258.0, 22270.0, 110300.0, 108.9, 21810.0, 1563.0, 3660.0, …
$ pop_year <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ lastcensus <dbl> 2010, 1979, 1970, NA, 2001, NA, 1989, 2010, 2010, 2001, 201…
$ gdp_year <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ economy <chr> "6. Developing region", "7. Least developed region", "7. Le…
$ income_grp <chr> "2. High income: nonOECD", "5. Low income", "3. Upper middl…
$ wikipedia <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fips_10 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ iso_a2 <chr> "AW", "AF", "AO", "AI", "AL", "AX", "AD", "AE", "AR", "AM",…
$ iso_a3 <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALA", "AND", "ARE", "AR…
$ iso_n3 <chr> "533", "004", "024", "660", "008", "248", "020", "784", "03…
$ un_a3 <chr> "533", "004", "024", "660", "008", "248", "020", "784", "03…
$ wb_a2 <chr> "AW", "AF", "AO", NA, "AL", NA, "AD", "AE", "AR", "AM", "AS…
$ wb_a3 <chr> "ABW", "AFG", "AGO", NA, "ALB", NA, "ADO", "ARE", "ARG", "A…
$ woe_id <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ adm0_a3_is <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALA", "AND", "ARE", "AR…
$ adm0_a3_us <chr> "ABW", "AFG", "AGO", "AIA", "ALB", "ALD", "AND", "ARE", "AR…
$ adm0_a3_un <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ adm0_a3_wb <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ continent <chr> "North America", "Asia", "Africa", "North America", "Europe…
$ region_un <chr> "Americas", "Asia", "Africa", "Americas", "Europe", "Europe…
$ subregion <chr> "Caribbean", "Southern Asia", "Middle Africa", "Caribbean",…
$ region_wb <chr> "Latin America & Caribbean", "South Asia", "Sub-Saharan Afr…
$ name_len <dbl> 5, 11, 6, 8, 7, 5, 7, 20, 9, 7, 14, 10, 23, 22, 17, 9, 7, 1…
$ long_len <dbl> 5, 11, 6, 8, 7, 13, 7, 20, 9, 7, 14, 10, 27, 35, 19, 9, 7, …
$ abbrev_len <dbl> 5, 4, 4, 4, 4, 5, 4, 6, 4, 4, 9, 4, 7, 10, 6, 4, 5, 4, 4, 5…
$ tiny <dbl> 4, NA, NA, NA, NA, 5, 5, NA, NA, NA, 3, NA, NA, 2, 4, NA, N…
$ homepart <dbl> NA, 1, 1, NA, 1, NA, 1, 1, 1, 1, NA, 1, NA, NA, 1, 1, 1, 1,…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-69.89912 1..., MULTIPOLYGON (…
Code

Adding new data
We can add new data to the sf object …
Code
# here we join additional data into the world map
D=world %>% inner_join(gapminder, by=c("name"="country"))
# D now contains coordinate `geometry` from world and
# additional data from gapminder - we plot it again with geom_sf()
p <- D %>%
# keep plot size manageable
filter(year>=1992) %>%
ggplot(data = .) +
# add labels to be shown in plotly hover
geom_sf(aes(fill=log10(pop),label=name, label1=year)) +
# custom colour scheme
scale_fill_gradient2(low="white", high="red", midpoint=6) +
# default map
coord_sf(crs = st_crs("+proj=longlat +datum=WGS84")) +
# Mercator map
# coord_sf(crs = st_crs(3857)) +
# separate plot for each year
facet_wrap(~year) +
ggtitle('Map of world with gapminder data')
#p
# use plotly for more interactivity - just because we can
ggplotly(p)There is more
Maps with maps
Map Manipulation with maps and sf
Code
# convert map object (list) to sf object (data frame)
counties <- st_as_sf(map("county", plot = FALSE, fill = TRUE))
#states <- st_as_sf(map("state", plot = FALSE, fill = TRUE)) # unused here
# subset US counties in Florida and check if polygon is valid (not for all!)
counties <- counties %>%
subset(grepl("florida", counties$ID)) %>%
filter(st_is_valid(.))
# Calculate the county area
counties$area <- as.numeric(st_area(counties))
# ggplot with geom_sf() - a more modern version of geom_map
ggplot(data = world) +
# first show map and borders beyond Florida counties
geom_sf() +
# now overlay the filled counties of Florida
geom_sf(data = counties, aes(fill = area)) +
# chose colour scheme
scale_fill_viridis_c(trans = "sqrt", alpha = .4) +
# define coordinates
coord_sf(xlim = c(-88, -78), ylim = c(24.5, 33), expand = FALSE)



